library(tidyverse)
library(readxl)
library(broom)
library(gt)

Reading in the data

In this R Markdown file, the Excel file that is read in is called analytic_data.xlxs. The data frame is called EXAMPLE_DATA. Replace these with the names of the files you wish to use.

EXAMPLE_DATA <- read_excel("analytic_data.xlsx")
EXAMPLE_DATA <- EXAMPLE_DATA %>% 
  mutate_if(is.character,as.factor)

In all of the code below, you will need to replace EXAMPLE_DATA with the name of your data frame. You will need to use the appropriate variable names.

Two sample t-test

Without assuming equal variances

t.test(NUMERICAL_VARIABLE1 ~ CATEGORICAL_VARIABLE1, data = EXAMPLE_DATA, conf.level=0.95)
## 
##  Welch Two Sample t-test
## 
## data:  NUMERICAL_VARIABLE1 by CATEGORICAL_VARIABLE1
## t = -0.7219, df = 12.804, p-value = 0.4833
## alternative hypothesis: true difference in means between group A and group B is not equal to 0
## 95 percent confidence interval:
##  -2.957998  1.477998
## sample estimates:
## mean in group A mean in group B 
##            3.97            4.71

Assuming equal variances

t.test(NUMERICAL_VARIABLE1 ~ CATEGORICAL_VARIABLE1, data = EXAMPLE_DATA, var.equal=TRUE, conf.level=0.95)
## 
##  Two Sample t-test
## 
## data:  NUMERICAL_VARIABLE1 by CATEGORICAL_VARIABLE1
## t = -0.7219, df = 18, p-value = 0.4796
## alternative hypothesis: true difference in means between group A and group B is not equal to 0
## 95 percent confidence interval:
##  -2.893602  1.413602
## sample estimates:
## mean in group A mean in group B 
##            3.97            4.71

A way to present the output

independentsamples <- t.test(NUMERICAL_VARIABLE1 ~ CATEGORICAL_VARIABLE1, data = EXAMPLE_DATA, conf.level=0.95)
tidy(independentsamples) %>%
    select(estimate, conf.low, conf.high, p.value) %>%
  gt() %>%
  fmt_number(c(estimate, conf.low, conf.high),
             decimals = 2) %>%
  fmt_number(p.value, decimals = 3) %>%
  cols_merge_range(conf.low, conf.high, sep = " to ") %>%
  cols_align("center", everything()) %>%
  cols_label(estimate = "Estimate of the difference of means", conf.low = "95% CI ",
              p.value = "P-value") 
Estimate of the difference of means 95% CI P-value
−0.74 −2.96 to 1.48 0.483

Wilcoxon test

wilcox.test(x=EXAMPLE_DATA$NUMERICAL_VARIABLE1, y=EXAMPLE_DATA$NUMERICAL_VARIABLE2, conf.int=T)
## Warning in wilcox.test.default(x = EXAMPLE_DATA$NUMERICAL_VARIABLE1, y =
## EXAMPLE_DATA$NUMERICAL_VARIABLE2, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = EXAMPLE_DATA$NUMERICAL_VARIABLE1, y =
## EXAMPLE_DATA$NUMERICAL_VARIABLE2, : cannot compute exact confidence intervals
## with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  EXAMPLE_DATA$NUMERICAL_VARIABLE1 and EXAMPLE_DATA$NUMERICAL_VARIABLE2
## W = 186, p-value = 0.7148
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -1.200059  1.099938
## sample estimates:
## difference in location 
##              -0.199919

Paired t-test

t.test(EXAMPLE_DATA$NUMERICAL_VARIABLE1, EXAMPLE_DATA$NUMERICAL_VARIABLE2, paired = TRUE)
## 
##  Paired t-test
## 
## data:  EXAMPLE_DATA$NUMERICAL_VARIABLE1 and EXAMPLE_DATA$NUMERICAL_VARIABLE2
## t = 0.40192, df = 19, p-value = 0.6922
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.82048  1.21048
## sample estimates:
## mean difference 
##           0.195

Wilcoxon signed rank test

wilcox.test(x=EXAMPLE_DATA$NUMERICAL_VARIABLE1, y=EXAMPLE_DATA$NUMERICAL_VARIABLE2, paired = T, conf.int=T)
## Warning in wilcox.test.default(x = EXAMPLE_DATA$NUMERICAL_VARIABLE1, y =
## EXAMPLE_DATA$NUMERICAL_VARIABLE2, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = EXAMPLE_DATA$NUMERICAL_VARIABLE1, y =
## EXAMPLE_DATA$NUMERICAL_VARIABLE2, : cannot compute exact confidence interval
## with ties
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  EXAMPLE_DATA$NUMERICAL_VARIABLE1 and EXAMPLE_DATA$NUMERICAL_VARIABLE2
## V = 111.5, p-value = 0.8227
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -0.9500783  1.2500660
## sample estimates:
## (pseudo)median 
##      0.1500488

Correlation

cor.test(~NUMERICAL_VARIABLE1 + NUMERICAL_VARIABLE2, EXAMPLE_DATA)
## 
##  Pearson's product-moment correlation
## 
## data:  NUMERICAL_VARIABLE1 and NUMERICAL_VARIABLE2
## t = 1.7861, df = 18, p-value = 0.09094
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.06581448  0.70882202
## sample estimates:
##       cor 
## 0.3880067

Fisher’s exact test

fisher.test(EXAMPLE_DATA$CATEGORICAL_VARIABLE1, EXAMPLE_DATA$CATEGORICAL_VARIABLE2)
## 
##  Fisher's Exact Test for Count Data
## 
## data:  EXAMPLE_DATA$CATEGORICAL_VARIABLE1 and EXAMPLE_DATA$CATEGORICAL_VARIABLE2
## p-value = 0.06978
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.007870555 1.133635839
## sample estimates:
## odds ratio 
##  0.1226533

Chi-squared test

chisq.test(EXAMPLE_DATA$CATEGORICAL_VARIABLE1, EXAMPLE_DATA$CATEGORICAL_VARIABLE2)
## Warning in chisq.test(EXAMPLE_DATA$CATEGORICAL_VARIABLE1,
## EXAMPLE_DATA$CATEGORICAL_VARIABLE2): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  EXAMPLE_DATA$CATEGORICAL_VARIABLE1 and EXAMPLE_DATA$CATEGORICAL_VARIABLE2
## X-squared = 3.2323, df = 1, p-value = 0.0722

© Statistical Consulting Centre, University of Melbourne, 2023